import urllib,gzip,re,requests,csv

#此脚本不支持分页功能


# 爬取页面
url = 'https://bj.58.com/dashanzi/chuzu/pn1/?ClickID=1'
res = requests.get(url)
html = res.content.decode('utf-8')

# 正则匹配,获取list
ul = re.findall('<ul class="listUl">([\s\S]*?)</ul>', html)
li_list = re.findall('<li[\s\S]*?>([\s\S]*?)</li>', ul[0])
for key,val in enumerate(li_list):
	mess = re.findall('<div class="img_list">[\s\S]*?<img[\s\S]*?src="(.*)">[\s\S]*?</div>[\s\S]*?<div class="des">[\s\S]*?<a[\s\S]*?class="strongbox"[\s\S]*?>([\s\S]*?)</a>[\s\S]*?<p class="room strongbox">([\s\S]*?)&nbsp;&nbsp;&nbsp;&nbsp;[\s\S]*?</p>[\s\S]*?</div>[\s\S]*?<div class="listliright">[\s\S]*?<b class="strongbox">(.*)</b>[\s\S]*?</div>', li_list[key])
	tmp = []
	for i in mess:
		for k,v in enumerate(i):
			tmp.append(v.strip())
	li_list[key] = tmp

# csv 写入数据
out = open('58.csv','a', newline='')
# 设定写入模式
csv_write = csv.writer(out,dialect='excel')
# 写入具体内容
csv_write.writerow(['图片地址', '标题', '户型', '价格'])
for i in li_list:
	csv_write.writerow(i)
print ("58.csv write over")